home *** CD-ROM | disk | FTP | other *** search
- Subject: v25i001: chop - extract selected fields or columns of text lines
- Newsgroups: comp.sources.unix
- Approved: vixie@pa.dec.com
-
- Submitted-by: George L Sicherman <gls@hrmso.att.com>
- Posting-number: Volume 25, Issue 1
- Archive-name: chop
-
- SYNOPSIS
- chop -flist [ -dseparators ] [ -b ] [ file ... ]
- chop -clist [ file ... ]
-
- DESCRIPTION
- Chop extracts selected fields or columns of lines from the
- specified files or the standard input, and writes them to
- the standard output. If you specify -f, chop extracts
- fields. If you specify -c, chop extracts columns.
-
- #! /bin/sh
- # This is a shell archive. Remove anything before this line, then unpack
- # it by saving it into a file and typing "sh file". To overwrite existing
- # files, type "sh file -c". You can also feed this as standard input via
- # unshar, or by typing "sh <file", e.g.. If this archive is complete, you
- # will see the following message at the end:
- # "End of shell archive."
- # Contents: chop.1 chop.c Makefile
- # Wrapped by vixie@cognition.pa.dec.com on Mon Nov 25 13:18:00 1991
- PATH=/bin:/usr/bin:/usr/ucb ; export PATH
- if test -f 'chop.1' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'chop.1'\"
- else
- echo shar: Extracting \"'chop.1'\" \(2037 characters\)
- sed "s/^X//" >'chop.1' <<'END_OF_FILE'
- X.\" February 10, 1991.
- X.TH chop 1 Local
- X.UC 4
- X.SH NAME
- chop \- extract selected fields or columns of text lines
- X.SH SYNOPSIS
- X.B chop
- X.BR \-f list
- X[
- X.BR \-d separators
- X] [
- X.BR \-b
- X] [ file ... ]
- X.br
- X.B chop
- X.BR \-c list
- X[ file ... ]
- X.SH DESCRIPTION
- X.I Chop
- extracts selected fields or columns of lines from the specified files or the
- standard input,
- and writes them to the standard output.
- If you specify
- X.BR \-f ,
- X.I chop
- extracts fields.
- If you specify
- X.BR \-c ,
- X.I chop
- extracts columns.
- X.PP
- XFields and columns are numbered from 1.
- A list of fields or columns consists of one or more ranges
- separated by commas.
- A range is a single number or a minus sign with a number
- at one or both ends.
- An open range runs to the corresponding end of the line;
- for instance,
- X.B \-3
- is the same as
- X.BR 1\-3 ,
- and
- X.B 5\-
- means all fields or columns from 5 onward.
- An input line ends with a newline, a carriage return, or a form feed.
- X.PP
- XFor the
- X.B \-f
- option, the default input separator is white space, and the default
- output separator is the horizontal tab.
- Use the
- X.B \-d
- option to specify an alternate set of input separators;
- the first of them will be used on output.
- If you use the
- X.B \-d
- option, every occurrence of an input separator delimits a field.
- If you do not specify
- X.BR \-d ,
- leading white space in a line is normally ignored.
- To treat leading white space as a separator, specify
- X.BR \-b .
- X.PP
- XFor the
- X.B \-c
- option, a backspace character decrements the column,
- and a horizontal tab advances to the next standard 8-column tab stop.
- X.SH DIAGNOSTICS
- XExits with status 0 on success, 1 on invalid syntax,
- and 2 if it cannot read an argument file.
- X.SH NOTES
- X.I Chop
- is meant to improve on
- X.IR cut (1).
- It has no limit on input line width or backspacing,
- lets many characters be input separators at once,
- and accepts white space as an input separator.
- It is not wholly compatible with
- X.I cut;
- in particular, it processes
- X.I all
- input lines the same way.
- X.SH SEE ALSO
- cut(1), paste(1), awk(1)
- X.SH PROVIDER
- G. L. Sicherman (odyssey\^!\^gls)
- END_OF_FILE
- if test 2037 -ne `wc -c <'chop.1'`; then
- echo shar: \"'chop.1'\" unpacked with wrong size!
- fi
- # end of 'chop.1'
- fi
- if test -f 'chop.c' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'chop.c'\"
- else
- echo shar: Extracting \"'chop.c'\" \(5684 characters\)
- sed "s/^X//" >'chop.c' <<'END_OF_FILE'
- X/*
- X * chop - chop selected fields out of text.
- X * G. L. Sicherman. August 7, 1990. February 10, 1991.
- X *
- X * chop -cLIST [file ...]
- X * chop -fLIST [-dchar] [-b] [file ...]
- X *
- X * Chop writes the specified fields (-f) or columns (-c)
- X * to the standard output.
- X * Default output field separator is horizontal tab.
- X * Default field separator is white space;
- X * -dx... specifies characters x... as input field separators
- X * and the first x as output field separator.
- X * If the input separator is white space, leading white space
- X * is ignored unless you specify -b.
- X *
- X * Unlike cut(1), chop processes *all* input lines as specified,
- X * and a line can be as long as you like.
- X *
- X * Carriage returns, line feeds, and form feeds all act as
- X * line separators. Other control characters (other than
- X * separators) are treated as ordinary characters in fields.
- X * In columns, BS is -1 column, HT jumps to a standard
- X * 8-column tab stop.
- X *
- X * EXIT CODES: 0 on success;
- X * 1 on syntax errors;
- X * 2 on file access failures.
- X */
- X
- X#include <ctype.h>
- X#include <stdio.h>
- X#include <string.h>
- X
- char *calloc();
- X
- X#define INFINITY (-1)
- X#define DEFOUTSEP '\t'
- X
- struct rangetype {
- X int first;
- X int last;
- X};
- X
- static int bflag = 0;
- static char *cflag = (char *)NULL;
- static char *dflag = (char *)NULL;
- static char *fflag = (char *)NULL;
- static FILE *Input;
- static int nranges;
- static char *progn;
- static struct rangetype *range;
- static int (*process)();
- static int cprocess(), fprocess();
- X
- usage()
- X{
- X fprintf(stderr, "usage: %s -cLIST [file ...]\n", progn);
- X fprintf(stderr, " %s -fLIST [-dCHARS] [-b] [file ...]\n", progn);
- X exit(1);
- X}
- X
- main(argc, argv)
- int argc;
- char **argv;
- X{
- X progn = *argv;
- X while (--argc) {
- X if ('-'!=**++argv) break;
- X switch(*++*argv) {
- X case 'b':
- X bflag = 1;
- X break;
- X case 'c':
- X if (!*(cflag = ++*argv)) {
- X fprintf(stderr,
- X "%s: column list required after -c\n", progn);
- X exit(1);
- X }
- X break;
- X case 'd':
- X if (!*(dflag = ++*argv)) {
- X fprintf(stderr,
- X "%s: character required after -d\n", progn);
- X exit(1);
- X }
- X break;
- X case 'f':
- X if (!*(fflag = ++*argv)) {
- X fprintf(stderr,
- X "%s: field list required after -f\n", progn);
- X exit(1);
- X }
- X break;
- X default:
- X usage();
- X }
- X }
- X if (!fflag == !cflag) usage();
- X if (cflag) {
- X process = cprocess;
- X setupranges(cflag);
- X }
- X else {
- X process = fprocess;
- X setupranges(fflag);
- X }
- X if (cflag && dflag) fprintf(stderr, "%s: -d option ignored with -c\n",
- X progn);
- X if (cflag && bflag) fprintf(stderr, "%s: -b option ignored with -c\n",
- X progn);
- X if (fflag && dflag && bflag) {
- X fprintf(stderr, "%s: -b option ignored with -d\n", progn);
- X bflag = 0;
- X }
- X if (argc) while (argc--) {
- X Input = fopen(*argv++, "r");
- X if (!Input) {
- X fprintf(stderr, "%s: cannot read %s\n",
- X progn, *--argv);
- X exit(2);
- X }
- X (*process)(Input);
- X fclose(Input);
- X }
- X else (*process)(stdin);
- X}
- X
- X/*
- X * chop -c
- X */
- static int incol, outcol;
- X
- static int
- cprocess(stream)
- XFILE *stream;
- X{
- X int k;
- X
- X incol = outcol = 0;
- X for (;;) {
- X switch(k = getc(stream)) {
- X case EOF: return;
- X case '\b': if (!incol--) incol = 0;
- X break;
- X case '\t': incol = (incol+8)&~7;
- X break;
- X case '\n':
- X case '\r':
- X case '\f': catchup();
- X putchar(k);
- X incol = outcol = 0;
- X break;
- X default:
- X catchup();
- X if (wantit(++outcol)) putchar(k);
- X incol++;
- X }
- X }
- X}
- X
- catchup()
- X{
- X while (incol > outcol) if (wantit(++outcol)) putchar(' ');
- X while (incol < outcol) if (wantit(outcol--)) putchar('\b');
- X}
- X
- X/*
- X * chop -f
- X */
- X
- static int
- fprocess(stream)
- XFILE *stream;
- X{
- X int field, go;
- X int k;
- X int later;
- X char *seps;
- X int want;
- X
- X field = later = 0;
- X seps = dflag? dflag: "\t ";
- X for (;;) {
- newfield:
- X field++;
- X go = 0;
- X want = wantit(field);
- X if (want && later++) putchar(*seps);
- X for (;;) { /* process one field */
- X k = getc(stream);
- X if (EOF==k) return;
- X if (strchr(seps, k)) {
- X if (field==1 && !go
- X && !dflag && !bflag) continue;
- X else break;
- X }
- X if (strchr("\n\r\f", k)) {
- X putchar(k);
- X field = later = 0;
- X goto newfield; /* i.e., break 2 */
- X }
- X go = 1;
- X if (want) putchar(k);
- X }
- X/*
- X * If white space, skip it.
- X */
- X if (!dflag) for (;;) {
- X k = getc(stream);
- X if (EOF==k) {
- X putchar(*seps);
- X return;
- X }
- X if (strchr(seps, k)) continue;
- X ungetc(k, stream);
- X break;
- X }
- X }
- X}
- X
- int
- wantit(f)
- int f;
- X{
- X int r;
- X
- X for (r=0; r<nranges; r++) {
- X if (range[r].last != INFINITY && range[r].last < f) continue;
- X if (range[r].first <= f) return 1;
- X }
- X return 0;
- X}
- X
- setupranges(list)
- char *list;
- X{
- X char *cursor;
- X/*
- X * How many commas?
- X */
- X nranges = 1;
- X for (cursor = list; *cursor; cursor++)
- X if (',' == *cursor) nranges++;
- X range = (struct rangetype *)calloc(nranges, sizeof(struct rangetype));
- X nranges = 0;
- X for (cursor = list; cursor; ) {
- X onerange(cursor);
- X if (cursor = strchr(cursor,',')) cursor++;
- X }
- X}
- X
- onerange(r)
- char *r;
- X{
- X if (isdigit(*r)) {
- X range[nranges].first = atoi(r);
- X while (isdigit(*r)) r++;
- X if (','==*r || !*r) {
- X range[nranges].last = range[nranges].first;
- X nranges++;
- X return;
- X }
- X if ('-'!=*r++) {
- X fprintf(stderr,
- X "%s: bad character '%c' in field list\n",
- X progn, *--r);
- X exit(1);
- X }
- X }
- X else if ('-'==*r++) {
- X range[nranges].first = 1;
- X }
- X else {
- X fprintf(stderr, "%s: bad character '%c' in field list\n",
- X progn, *--r);
- X exit(1);
- X }
- X if (','==*r || !*r) {
- X range[nranges++].last = INFINITY;
- X return;
- X }
- X if (!isdigit(*r)) {
- X fprintf(stderr, "%s: bad character '%c' in field list\n",
- X progn, *r);
- X exit(1);
- X }
- X range[nranges++].last = atoi(r);
- X while (isdigit(*r)) r++;
- X if (*r && ','!=*r) {
- X fprintf(stderr, "%s: bad character '%c' in field list\n",
- X progn, *r);
- X exit(1);
- X }
- X}
- END_OF_FILE
- if test 5684 -ne `wc -c <'chop.c'`; then
- echo shar: \"'chop.c'\" unpacked with wrong size!
- fi
- # end of 'chop.c'
- fi
- if test -f 'Makefile' -a "${1}" != "-c" ; then
- echo shar: Will not clobber existing file \"'Makefile'\"
- else
- echo shar: Extracting \"'Makefile'\" \(291 characters\)
- sed "s/^X//" >'Makefile' <<'END_OF_FILE'
- BINDEST=$$HOME/lubin/chop
- MANDEST=$$HOME/luman/chop.1
- X
- CFLAGS = -O
- chop : chop.o
- X cc -o chop chop.o
- install : chop chop.1
- X cp chop $(BINDEST)
- X cp chop.1 $(MANDEST)
- shar : chop.shar
- chop.shar : Makefile chop.1 chop.c
- X shar Makefile chop.1 chop.c > chop.shar
- clean :
- X rm -f chop chop.shar *.o
- END_OF_FILE
- if test 291 -ne `wc -c <'Makefile'`; then
- echo shar: \"'Makefile'\" unpacked with wrong size!
- fi
- # end of 'Makefile'
- fi
- echo shar: End of shell archive.
- exit 0
-